範本:http://graphics.wsj.com/infectious-diseases-and-vaccines/
library(readr)
Polio<-read_csv("https://raw.githubusercontent.com/CGUIM-BigDataAnalysis/BigDataCGUIM/master/104/POLIO_Incidence.csv")
## Rows: 2184 Columns: 53
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (51): ALABAMA, ALASKA, ARIZONA, ARKANSAS, CALIFORNIA, COLORADO, CONNECTI...
## dbl (2): YEAR, WEEK
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
查看資料長相
Polio
| YEAR | WEEK | ALABAMA | ALASKA | ARIZONA | ARKANSAS | CALIFORNIA | COLORADO | CONNECTICUT | DELAWARE | DISTRICT OF COLUMBIA | FLORIDA | GEORGIA | HAWAII | IDAHO | ILLINOIS | INDIANA | IOWA | KANSAS | KENTUCKY | LOUISIANA | MAINE | MARYLAND | MASSACHUSETTS | MICHIGAN | MINNESOTA | MISSISSIPPI | MISSOURI | MONTANA | NEBRASKA | NEVADA | NEW HAMPSHIRE | NEW JERSEY | NEW MEXICO | NEW YORK | NORTH CAROLINA | NORTH DAKOTA | OHIO | OKLAHOMA | OREGON | PENNSYLVANIA | RHODE ISLAND | SOUTH CAROLINA | SOUTH DAKOTA | TENNESSEE | TEXAS | UTAH | VERMONT | VIRGINIA | WASHINGTON | WEST VIRGINIA | WISCONSIN | WYOMING |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1928 | 1 | 0 | - | 0 | 0 | 0.17 | 0.39 | 0 | 0 | - | 0 | 0.03 | - | 0 | 0.03 | 0.03 | 0.08 | 0 | 0 | 0 | 0 | 0.06 | 0.14 | 0.04 | 0 | 0 | 0.03 | 0.18 | 0.07 | - | - | 0.08 | 0 | 0.08 | 0 | - | 0.02 | 0 | 0.64 | 0 | 0 | 0.06 | 0 | 0.04 | 0.05 | 0 | 0 | - | 0.26 | 0.06 | 0.03 | 0 |
| 1928 | 2 | 0 | - | 0 | 0 | 0.15 | 0.2 | 0 | 0 | - | 0 | 0 | - | 0 | 0.01 | 0.03 | - | 0.22 | 0 | 0.05 | 0.13 | 0.06 | 0.14 | 0.04 | 0.04 | 0 | 0.06 | 0 | 0.07 | - | - | 0.03 | 0 | 0.05 | 0.03 | 0.45 | - | 0.04 | 0.43 | 0.03 | 0 | 0.06 | 0 | 0.04 | 0.04 | 0 | 0 | - | 0.39 | 0.24 | 0.03 | 0 |
| 1928 | 3 | 0.04 | - | 0 | 0 | 0.11 | 0 | 0.06 | 0 | - | 0 | - | - | 0 | 0.03 | 0.03 | - | 0 | 0 | 0 | 0 | 0 | 0.07 | 0.02 | 0 | 0 | 0.03 | 0.18 | 0 | - | - | 0 | 0 | 0.03 | 0 | 0 | 0.06 | 0 | 1.07 | 0.02 | 0 | 0.35 | 0 | 0 | 0 | 0 | 0 | - | 0.13 | 0.12 | 0.03 | 0 |
| 1928 | 4 | 0 | - | 0.24 | 0.11 | 0.07 | 0.2 | 0.06 | 0 | 0 | 0 | 0 | - | 0 | 0.05 | 0.12 | 0 | 0 | 0 | 0 | 0 | 0 | 0.02 | 0.02 | 0 | 0 | 0.06 | 0 | 0 | - | 0 | 0.03 | 0 | 0.06 | 0 | 0.15 | 0 | 0.09 | 0.53 | 0.02 | 0 | 0.23 | 0 | 0.04 | 0.05 | 0 | 0 | - | 0.06 | 0.12 | 0 | 0 |
| 1928 | 5 | 0 | - | 0.24 | 0 | 0.32 | 0 | 0.13 | 0 | 0 | 0 | 0 | - | 0 | 0.04 | 0 | 0.04 | 0 | 0 | 0 | 0.38 | 0.12 | 0.02 | 0.04 | 0 | 0 | 0 | 0 | 0.15 | - | 0 | 0.03 | 0.48 | 0.07 | 0 | 0 | 0.03 | 0 | 0.32 | 0 | 0 | 0.17 | 0.15 | 0 | 0.05 | 0 | 0 | - | 0.13 | 0.06 | 0.03 | 0 |
| 1928 | 6 | 0 | - | 0 | 0 | 0.22 | 0.1 | 0 | 0 | 0 | 0 | 0 | - | - | 0.03 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.05 | 0.06 | 0 | 0 | 0 | 0 | 0.07 | - | 0 | 0 | 0 | 0.03 | 0 | 0 | 0.05 | 0.04 | 0.21 | 0.04 | 0 | 0.06 | 0.29 | 0.04 | 0 | 0.2 | 0 | 0.04 | 0.06 | 0 | 0.14 | 0 |
| 1928 | 7 | 0.08 | - | 0 | 0 | 0.13 | 0 | 0 | 0 | 0 | 0 | 0 | - | 0.22 | 0.01 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | 0 | 0 | 0 | 0 | 0 | 0 | - | 0.21 | 0.03 | 0.24 | 0.02 | 0.03 | 0.15 | 0.05 | 0.04 | 0.32 | 0.01 | 0 | 0 | 0 | 0 | 0 | 0.4 | 0 | - | 0 | 0 | 0.07 | 0 |
| 1928 | 8 | 0.11 | - | 0 | 0 | 0.11 | 0 | 0 | 0 | - | 0.14 | 0 | - | 0 | 0.01 | 0 | 0.04 | 0 | 0 | 0.05 | 0 | 0 | 0.05 | 0.02 | 0.04 | 0.05 | 0 | 0 | 0 | - | - | 0 | 0.48 | 0.03 | 0 | 0 | 0.03 | 0.17 | 0.11 | 0.01 | 0 | 0.06 | 0.15 | 0 | 0 | 0 | 0 | - | 0.06 | 0 | 0.03 | 0 |
| 1928 | 9 | 0 | - | 0 | 0 | 0.15 | 0 | 0.06 | 0 | 0 | 0 | 0 | - | 0.22 | 0.01 | 0 | 0 | 0 | 0 | 0.1 | 0 | 0.12 | 0.14 | 0 | 0 | 0.1 | 0 | 0 | 0.07 | - | 0.21 | 0 | 0 | 0.03 | 0.03 | 0.15 | 0.02 | 0.04 | 0.32 | 0.02 | 0 | 0.12 | 0 | 0 | 0 | 0 | 0 | 0.04 | 0.26 | 0.06 | 0 | 0 |
| 1928 | 10 | 0 | - | 0 | 0 | 0.11 | 0.1 | 0 | 0 | - | 0.07 | 0 | - | 0 | 0.04 | 0.03 | 0 | 0.05 | 0 | 0 | 0.25 | 0 | 0.02 | 0 | 0 | 0 | 0.03 | - | 0.15 | - | 0 | 0 | 0 | 0.04 | 0 | 0.15 | 0 | 0.04 | 0.21 | 0 | 0 | 0.06 | 0 | 0 | 0 | 0 | 0 | - | 0.06 | 0.06 | 0 | 0 |
library(tidyr)
PolioLong<-pivot_longer(Polio,
cols = ALABAMA:WYOMING, #同c(-"YEAR",-"WEEK")
names_to = "State")
查看轉成長表後的資料長相
PolioLong
| YEAR | WEEK | State | value |
|---|---|---|---|
| 1928 | 1 | ALABAMA | 0 |
| 1928 | 1 | ALASKA | - |
| 1928 | 1 | ARIZONA | 0 |
| 1928 | 1 | ARKANSAS | 0 |
| 1928 | 1 | CALIFORNIA | 0.17 |
| 1928 | 1 | COLORADO | 0.39 |
| 1928 | 1 | CONNECTICUT | 0 |
| 1928 | 1 | DELAWARE | 0 |
| 1928 | 1 | DISTRICT OF COLUMBIA | - |
| 1928 | 1 | FLORIDA | 0 |
發現可能需要轉換資料型態以及整合年份資料
skimr::skim(PolioLong)
| Name | PolioLong |
| Number of rows | 111384 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| State | 0 | 1 | 4 | 20 | 0 | 51 | 0 |
| value | 0 | 1 | 1 | 5 | 0 | 618 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| YEAR | 0 | 1 | 1948.5 | 12.12 | 1928 | 1938.00 | 1948.5 | 1959.00 | 1969 | ▇▇▇▇▇ |
| WEEK | 0 | 1 | 26.5 | 15.01 | 1 | 13.75 | 26.5 | 39.25 | 52 | ▇▇▇▇▇ |
發現value欄位(發生率)是文字,需要轉換成數字
PolioLong$value<-as.numeric(PolioLong$value)
## Warning: NAs introduced by coercion
因有Warning,需查看原因,發現是-無法轉換成數字,合理
轉換好後重新查看資料型態是否正確
skimr::skim(PolioLong)
| Name | PolioLong |
| Number of rows | 111384 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| State | 0 | 1 | 4 | 20 | 0 | 51 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| YEAR | 0 | 1.00 | 1948.50 | 12.12 | 1928 | 1938.00 | 1948.50 | 1959.00 | 1969.00 | ▇▇▇▇▇ |
| WEEK | 0 | 1.00 | 26.50 | 15.01 | 1 | 13.75 | 26.50 | 39.25 | 52.00 | ▇▇▇▇▇ |
| value | 29866 | 0.73 | 0.22 | 0.61 | 0 | 0.00 | 0.03 | 0.16 | 33.08 | ▇▁▁▁▁ |
因資料內是各週的值,但我們做圖是用年份表示,需要整合。 而發生率的計算應須加總
library(dplyr)
PolioLongYear<-PolioLong %>% group_by(YEAR,State) %>%
summarise(Year_inc=sum(value,na.rm = T))
查看整合成每年發生率的資料長相
PolioLongYear
| YEAR | State | Year_inc |
|---|---|---|
| 1928 | ALABAMA | 2.39 |
| 1928 | ALASKA | 0.00 |
| 1928 | ARIZONA | 2.61 |
| 1928 | ARKANSAS | 0.52 |
| 1928 | CALIFORNIA | 5.04 |
| 1928 | COLORADO | 7.04 |
| 1928 | CONNECTICUT | 4.53 |
| 1928 | DELAWARE | 3.44 |
| 1928 | DISTRICT OF COLUMBIA | 6.92 |
| 1928 | FLORIDA | 1.47 |
library(ggplot2)
ggplot(PolioLongYear,aes(x=YEAR,y=State,fill=Year_inc))+
geom_tile()+
scale_fill_gradient(low="white",high = "steelblue")
發現背景很干擾,因此改成白色,並做其他細修。
scale_fill_gradientntheme_minimal(base_size = 9)theme_minimal(base_line_size = 0)geom_vline(xintercept = 1955)、geom_text(x=1962,y="WYOMING",label="Vaccine introduced")labs(x="Year",y="States",fill="Incidence")ggplot(PolioLongYear,aes(x=YEAR,y=State,fill=Year_inc))+
geom_tile(color="white")+
scale_fill_gradientn(colors=c("white","steelblue","seagreen3","yellow3","red4"),
values = c(0,0.05,0.1,0.15,0.2,1))+
theme_minimal(base_line_size = 0,base_size = 9)+
labs(x="Year",y="States",fill="Incidence")+
geom_vline(xintercept = 1955)+
geom_text(x=1962,y="WYOMING",label="Vaccine introduced")
如果不想調小字,但又不想要Y軸字會重疊的話,可設定名稱交錯。scale_y_discrete(guide = guide_axis(n.dodge=2))
但此法不適合用在Y軸,X軸是比較適合的使用情境。
ggplot(PolioLongYear,aes(x=YEAR,y=State,fill=Year_inc))+
geom_tile(color="white")+
scale_fill_gradientn(colors=c("white","steelblue","seagreen3","yellow3","red4"),
values = c(0,0.05,0.1,0.15,0.2,1))+
theme_minimal(base_line_size = 0)+
scale_y_discrete(guide = guide_axis(n.dodge=2))+
labs(x="Year",y="States",fill="Incidence")+
geom_vline(xintercept = 1955)+
geom_text(x=1962,y="WYOMING",label="Vaccine introduced")
heatmap<-
ggplot(PolioLongYear,aes(x=YEAR,y=State,fill=Year_inc))+
geom_tile(color="white")+
scale_fill_gradient(low="white",high = "red")+
theme_minimal(base_line_size = 0)+
labs(x="Year",y="States",fill="Incidence")+
geom_vline(xintercept = 1955)+
geom_text(x=1962,y="WYOMING",label="Vaccine introduced")
library(plotly)
ggplotly(heatmap)